# PoP - Policing Socio-Geographic Boundaries and Inequality
# script for creating final dataset of police stops for Milwaukee.

# load packages
suppressPackageStartupMessages(
  
  {
    library(dplyr)
    library(tidyverse)
    library(ggplot2)
    library(haven)
    library(readxl)
    library(readr)
    library(areal)
    library(car)
    library(estimatr)
    library(magrittr)
    library(texreg)
    library(sandwich)
    library(jtools)
  }
)


# load final data set
load("mil_stops_19_22.RData")


# make binary numeric vars for key cols
table(mil_stops_19_22$Race)

# race of arrestee
mil_stops_19_22 = mil_stops_19_22 %>%
  mutate(r_black = ifelse(Race == "B", 1, 0)) %>%
  mutate(r_white = ifelse(Race == "W", 1, 0)) %>%
  mutate(r_hisp = ifelse(Race == "H", 1, 0)) %>%
  mutate(r_asian = ifelse(Race == "A", 1, 0)) %>%
  mutate(r_other = ifelse(Race == "I", 1, 0)) %>%
  mutate(r_nonwhite = ifelse(Race == "B" | Race == "H" | Race == "A" | Race == "I", 1, 0))

# if person was searched
table(mil_stops_19_22$summaryReasonDetail)

# mil_stops_19_22 = mil_stops_19_22 %>%
#   mutate(searched = ifelse(PERSON_SEARCH == "Yes = 1" | PERSON_SEARCH == "YES", 1, 0)) %>%
#   mutate(nothing_found = ifelse(SEARCH_FOUND == "NOTHING", 1, 0))

# code reason for stop
table(mil_stops_19_22$summaryReason)

# read in codes for stop type
stop_codes <- read_excel("Tracs_V2ReasonContact.xlsx")

# code for traffic stops
mil_stops_19_22$traffic <- ifelse(mil_stops_19_22$summaryReason == "TS01", 1, 0)


table(mil_stops_19_22$traffic)

# save
save(x = mil_stops_19_22, file = "mil_stops_19_22.RData")

# Turning data into geometric object
sum(is.na(mil_stops_19_22$latitude))
# 259 NAs for location coords

# remove NAs
mil_stops_geo = mil_stops_19_22 %>% filter(!is.na(latitude))
mil_stops_geo = mil_stops_geo %>% filter(!is.na(longitude))

class(mil_stops_geo$latitude)

# convert from character to numeric
mil_stops_geo$latitude <- as.numeric(mil_stops_geo$latitude)
mil_stops_geo$longitude <- as.numeric(mil_stops_geo$longitude)

# remove newly introduced NAs
mil_stops_geo = mil_stops_geo %>% filter(!is.na(latitude))
mil_stops_geo = mil_stops_geo %>% filter(!is.na(longitude))


# set X and Y coords
library(sf)
mil_stops_geo = mil_stops_geo %>% st_as_sf(coords = c("longitude", "latitude"))

# load in final mil dataset
load("mil_final.RData")
# check crs for final df
st_crs(mil_fin)
# EPSG:4269 on NAD83


# coords are on EPSG:2277 on State Plane Texas Central (NAD83); so set initial coords to that system  
st_crs(mil_stops_geo)
st_crs(mil_stops_geo) <- 4269

# then transform to EPSG:4269 (on NAD83) to match aus_fin
mil_stops_geo$geometry = st_transform(mil_stops_geo$geometry, "EPSG:4269")

# rpd_aus_geo$geometry = st_transform(rpd_aus_geo$geometry, "EPSG:4326")


# set CRS between ped stops data and mil_fin
st_crs(mil_stops_geo) = st_crs(mil_fin)
mil_stops_geo = st_transform(mil_stops_geo, st_crs(mil_fin))

mil_stops = mil_stops_geo


# find intersection between stop data and mil blk groups 
mil_ints = st_intersects(mil_stops, mil_fin)

# now filter by race of arrestee
mil_ints2a = st_intersects(mil_stops %>% filter(r_black == 1), mil_fin)
mil_ints2b = st_intersects(mil_stops %>% filter(r_hisp == 1), mil_fin)
mil_ints2c = st_intersects(mil_stops %>% filter(r_white == 1), mil_fin)
mil_ints2d = st_intersects(mil_stops %>% filter(r_asian == 1), mil_fin)
mil_ints2e = st_intersects(mil_stops %>% filter(r_nonwhite == 1), mil_fin)

# sum total stops by blk
theout = mil_fin[mil_ints %>% unlist, ] %>% 
  dplyr::select(BLK_CODE) %>% 
  mutate(count = 1) %>% 
  group_by(BLK_CODE) %>% 
  summarize(all_stops_total = sum(count, na.rm = TRUE)) %>% 
  as.data.frame %>% 
  mutate(geometry = NULL)

# sum total stops by race
theout2a = mil_fin[mil_ints2a %>% unlist, ] %>% 
  dplyr::select(BLK_CODE) %>%
  mutate(count = 1) %>% 
  group_by(BLK_CODE) %>% 
  summarize(all_stops_black = sum(count, na.rm = TRUE)) %>% 
  as.data.frame %>% 
  mutate(geometry = NULL)

theout2b = mil_fin[mil_ints2b %>% unlist, ] %>% 
  dplyr::select(BLK_CODE) %>%
  mutate(count = 1) %>% 
  group_by(BLK_CODE) %>% 
  summarize(all_stops_latino = sum(count, na.rm = TRUE)) %>% 
  as.data.frame %>% 
  mutate(geometry = NULL)

theout2c = mil_fin[mil_ints2c %>% unlist, ] %>% 
  dplyr::select(BLK_CODE) %>%
  mutate(count = 1) %>% 
  group_by(BLK_CODE) %>% 
  summarize(all_stops_white = sum(count, na.rm = TRUE)) %>% 
  as.data.frame %>% 
  mutate(geometry = NULL)

theout2d = mil_fin[mil_ints2d %>% unlist, ] %>% 
  dplyr::select(BLK_CODE) %>%
  mutate(count = 1) %>% 
  group_by(BLK_CODE) %>% 
  summarize(all_stops_asian = sum(count, na.rm = TRUE)) %>% 
  as.data.frame %>% 
  mutate(geometry = NULL)

theout2e = mil_fin[mil_ints2e %>% unlist, ] %>% 
  dplyr::select(BLK_CODE) %>%
  mutate(count = 1) %>% 
  group_by(BLK_CODE) %>% 
  summarize(all_stops_nonwhite = sum(count, na.rm = TRUE)) %>% 
  as.data.frame %>% 
  mutate(geometry = NULL)

# remerge with mil_fin

mil_fin2 = merge(mil_fin, theout, by = "BLK_CODE", all.x = TRUE)
mil_fin2 = merge(mil_fin2, theout2a, by = "BLK_CODE", all.x = TRUE)
mil_fin2 = merge(mil_fin2, theout2b, by = "BLK_CODE", all.x = TRUE)
mil_fin2 = merge(mil_fin2, theout2c, by = "BLK_CODE", all.x = TRUE)
mil_fin2 = merge(mil_fin2, theout2d, by = "BLK_CODE", all.x = TRUE)
mil_fin2 = merge(mil_fin2, theout2e, by = "BLK_CODE", all.x = TRUE)


sum(is.na(mil_fin2$all_stops_total))

# now filter by just pedestrian stops
# find intersection between stop data and miltin blk groups 
mil_ints2 = st_intersects(mil_stops %>% filter(traffic == 0), mil_fin)

# now filter by race of arrestee
mil_ints3a = st_intersects(mil_stops %>% filter(traffic == 0 & r_black == 1), mil_fin)
mil_ints3b = st_intersects(mil_stops %>% filter(traffic == 0 & r_hisp == 1), mil_fin)
mil_ints3c = st_intersects(mil_stops %>% filter(traffic == 0 & r_white == 1), mil_fin)
mil_ints3d = st_intersects(mil_stops %>% filter(traffic == 0 & r_asian == 1), mil_fin)
mil_ints3e = st_intersects(mil_stops %>% filter(traffic == 0 & r_nonwhite == 1), mil_fin)

# sum total stops by blk
theout2 = mil_fin[mil_ints2 %>% unlist, ] %>% 
  dplyr::select(BLK_CODE) %>% 
  mutate(count = 1) %>% 
  group_by(BLK_CODE) %>% 
  summarize(ped_stops_total = sum(count, na.rm = TRUE)) %>% 
  as.data.frame %>% 
  mutate(geometry = NULL)

# sum total stops by race
theout3a = mil_fin[mil_ints3a %>% unlist, ] %>% 
  dplyr::select(BLK_CODE) %>%
  mutate(count = 1) %>% 
  group_by(BLK_CODE) %>% 
  summarize(ped_stops_black = sum(count, na.rm = TRUE)) %>% 
  as.data.frame %>% 
  mutate(geometry = NULL)

theout3b = mil_fin[mil_ints3b %>% unlist, ] %>% 
  dplyr::select(BLK_CODE) %>%
  mutate(count = 1) %>% 
  group_by(BLK_CODE) %>% 
  summarize(ped_stops_latino = sum(count, na.rm = TRUE)) %>% 
  as.data.frame %>% 
  mutate(geometry = NULL)

theout3c = mil_fin[mil_ints3c %>% unlist, ] %>% 
  dplyr::select(BLK_CODE) %>%
  mutate(count = 1) %>% 
  group_by(BLK_CODE) %>% 
  summarize(ped_stops_white = sum(count, na.rm = TRUE)) %>% 
  as.data.frame %>% 
  mutate(geometry = NULL)

theout3d = mil_fin[mil_ints3d %>% unlist, ] %>% 
  dplyr::select(BLK_CODE) %>%
  mutate(count = 1) %>% 
  group_by(BLK_CODE) %>% 
  summarize(ped_stops_asian = sum(count, na.rm = TRUE)) %>% 
  as.data.frame %>% 
  mutate(geometry = NULL)

theout3e = mil_fin[mil_ints3e %>% unlist, ] %>% 
  dplyr::select(BLK_CODE) %>%
  mutate(count = 1) %>% 
  group_by(BLK_CODE) %>% 
  summarize(ped_stops_nonwhite = sum(count, na.rm = TRUE)) %>% 
  as.data.frame %>% 
  mutate(geometry = NULL)

# remerge with mil_fin

mil_fin3 = merge(mil_fin2, theout2, by = "BLK_CODE", all.x = TRUE)
mil_fin3 = merge(mil_fin3, theout3a, by = "BLK_CODE", all.x = TRUE)
mil_fin3 = merge(mil_fin3, theout3b, by = "BLK_CODE", all.x = TRUE)
mil_fin3 = merge(mil_fin3, theout3c, by = "BLK_CODE", all.x = TRUE)
mil_fin3 = merge(mil_fin3, theout3d, by = "BLK_CODE", all.x = TRUE)
mil_fin3 = merge(mil_fin3, theout3e, by = "BLK_CODE", all.x = TRUE)


# convert NAs to 0s
# all stops
mil_fin3$all_stops_total <- ifelse(is.na(mil_fin3$all_stops_total), 0, mil_fin3$all_stops_total)
mil_fin3$all_stops_black <- ifelse(is.na(mil_fin3$all_stops_black), 0, mil_fin3$all_stops_black)
mil_fin3$all_stops_white <- ifelse(is.na(mil_fin3$all_stops_white), 0, mil_fin3$all_stops_white)
mil_fin3$all_stops_latino <- ifelse(is.na(mil_fin3$all_stops_latino), 0, mil_fin3$all_stops_latino)
mil_fin3$all_stops_asian <- ifelse(is.na(mil_fin3$all_stops_asian), 0, mil_fin3$all_stops_asian)
mil_fin3$all_stops_nonwhite <- ifelse(is.na(mil_fin3$all_stops_nonwhite), 0, mil_fin3$all_stops_nonwhite)


# ped stops
mil_fin3$ped_stops_total <- ifelse(is.na(mil_fin3$ped_stops_total), 0, mil_fin3$ped_stops_total)
mil_fin3$ped_stops_black <- ifelse(is.na(mil_fin3$ped_stops_black), 0, mil_fin3$ped_stops_black)
mil_fin3$ped_stops_white <- ifelse(is.na(mil_fin3$ped_stops_white), 0, mil_fin3$ped_stops_white)
mil_fin3$ped_stops_latino <- ifelse(is.na(mil_fin3$ped_stops_latino), 0, mil_fin3$ped_stops_latino)
mil_fin3$ped_stops_asian <- ifelse(is.na(mil_fin3$ped_stops_asian), 0, mil_fin3$ped_stops_asian)
mil_fin3$ped_stops_nonwhite <- ifelse(is.na(mil_fin3$ped_stops_nonwhite), 0, mil_fin3$ped_stops_nonwhite)

mil_stops_final = mil_fin3

# save
save(x = mil_stops_final, file = "mil_stops_final.RData")

load("mil_stops_final.RData")

# now create binned racial blv measures
mil_stops_final <- mil_stops_final %>% mutate(boundary_quart = quantile(mil_stops_final$p_race_white_blv, prob=.75, na.rm=TRUE),
                                              boundary_quart_dummy = ifelse(p_race_white_blv > boundary_quart,1,0 ))

# now run analyses

# log and +1 to variables (pop already logged)
mil_stops_final = mil_stops_final %>% 
  mutate(lmhhi = log(mhhi + 1),
         larrests = log(total_arrests + 1),
         lmisdemeanors = log(misdemeanor_arrests + 1),
         lfelonies = log(felony_arrests + 1),
         lnonviolent = log(nonviolent_arrests + 1),
         lviolent = log(violent_arrests + 1),
         lsociety = log(society_arrests + 1),
         lperson = log(person_arrests + 1),
         lproperty = log(property_arrests + 1),
         lcrime = log(crime_all + 1),
         lpropertycrime = log(crime_property + 1),
         lviolentcrime = log(crime_violent + 1))

# now for new stop vars
mil_stops_final = mil_stops_final %>%
  mutate(lall_stops_total = log(all_stops_total + 1),
         lall_stops_black = log(all_stops_black + 1),
         lall_stops_latino = log(all_stops_latino + 1),
         lall_stops_white = log(all_stops_white + 1),
         lall_stops_asian = log(all_stops_asian + 1),
         lall_stops_nonwhite = log(all_stops_nonwhite + 1),
         lped_stops_total = log(ped_stops_total + 1),
         lped_stops_black = log(ped_stops_black + 1),
         lped_stops_latino = log(ped_stops_latino + 1),
         lped_stops_white = log(ped_stops_white + 1),
         lped_stops_asian = log(ped_stops_asian + 1),
         lped_stops_nonwhite = log(ped_stops_nonwhite + 1))

# create scaled DVs (to account for different pop. sizes and density in blocks)
mil_stops_final$larrest_sd <- mil_stops_final$larrests - (mean(mil_stops_final$larrests)/sd(mil_stops_final$larrests))
mil_stops_final$lmisdemeanors_sd <- mil_stops_final$lmisdemeanors - (mean(mil_stops_final$lmisdemeanors)/sd(mil_stops_final$lmisdemeanors))
mil_stops_final$lfelonies_sd <- mil_stops_final$lfelonies - (mean(mil_stops_final$lfelonies)/sd(mil_stops_final$lfelonies))
mil_stops_final$lnonviolent_sd <- mil_stops_final$lnonviolent - (mean(mil_stops_final$lnonviolent)/sd(mil_stops_final$lnonviolent))
mil_stops_final$lviolent_sd <- mil_stops_final$lviolent - (mean(mil_stops_final$lviolent)/sd(mil_stops_final$lviolent))
mil_stops_final$lsociety_sd <- mil_stops_final$lsociety - (mean(mil_stops_final$lsociety)/sd(mil_stops_final$lsociety))
mil_stops_final$lperson_sd <- mil_stops_final$lperson - (mean(mil_stops_final$lperson)/sd(mil_stops_final$lperson))
mil_stops_final$lproperty_sd <- mil_stops_final$lproperty - (mean(mil_stops_final$lproperty)/sd(mil_stops_final$lproperty))

# create scaled DVs for stop vars
mil_stops_final$lall_stops_total_sd <- mil_stops_final$lall_stops_total - (mean(mil_stops_final$lall_stops_total)/sd(mil_stops_final$lall_stops_total))
mil_stops_final$lall_stops_black_sd <- mil_stops_final$lall_stops_black - (mean(mil_stops_final$lall_stops_black)/sd(mil_stops_final$lall_stops_black))
mil_stops_final$lall_stops_latino_sd <- mil_stops_final$lall_stops_latino - (mean(mil_stops_final$lall_stops_latino)/sd(mil_stops_final$lall_stops_latino))
mil_stops_final$lall_stops_white_sd <- mil_stops_final$lall_stops_white - (mean(mil_stops_final$lall_stops_white)/sd(mil_stops_final$lall_stops_white))
mil_stops_final$lall_stops_asian_sd <- mil_stops_final$lall_stops_asian - (mean(mil_stops_final$lall_stops_asian)/sd(mil_stops_final$lall_stops_asian))
mil_stops_final$lall_stops_nonwhite_sd <- mil_stops_final$lall_stops_nonwhite - (mean(mil_stops_final$lall_stops_nonwhite)/sd(mil_stops_final$lall_stops_nonwhite))

mil_stops_final$lped_stops_total_sd <- mil_stops_final$lped_stops_total - (mean(mil_stops_final$lped_stops_total)/sd(mil_stops_final$lped_stops_total))
mil_stops_final$lped_stops_black_sd <- mil_stops_final$lped_stops_black - (mean(mil_stops_final$lped_stops_black)/sd(mil_stops_final$lped_stops_black))
mil_stops_final$lped_stops_latino_sd <- mil_stops_final$lped_stops_latino - (mean(mil_stops_final$lped_stops_latino)/sd(mil_stops_final$lped_stops_latino))
mil_stops_final$lped_stops_white_sd <- mil_stops_final$lped_stops_white - (mean(mil_stops_final$lped_stops_white)/sd(mil_stops_final$lped_stops_white))
mil_stops_final$lped_stops_asian_sd <- mil_stops_final$lped_stops_asian - (mean(mil_stops_final$lped_stops_asian)/sd(mil_stops_final$lped_stops_asian))
mil_stops_final$lped_stops_nonwhite_sd <- mil_stops_final$lped_stops_nonwhite - (mean(mil_stops_final$lped_stops_nonwhite)/sd(mil_stops_final$lped_stops_nonwhite))


# save
save(x = mil_stops_final, file = "mil_stops_final.RData")

